In [1]:
import pandas as pd
import plotly as plt
import numpy as np
import plotly.express as px
In [2]:
data = pd.read_csv('all_ticks_wide.csv')
data['timestamp'] = pd.to_datetime(data['timestamp'])
data['timestamp']  = data['timestamp'].dt.tz_convert('Europe/Istanbul')
data.set_index('timestamp', inplace=True) 
data.index = data.index.tz_localize(None)
stocks = ['ASELS', 'THYAO', 'PGSUS', 'BANVT', 'MGROS', 'FROTO']
In [3]:
#function gives data between 2015-2017

def data_preparation(data, stocks):
    first_day = '2014-12-31'
    last_day = '2017-01-01'
    two_years_data = data[(data.index > first_day) & (data.index < last_day)]
    two_years_data = two_years_data[stocks]
    return two_years_data
In [4]:
#function gives daily change of price for each stock

def stocks_data(symbol, data):
    #find close values of stocks
    stock = data[symbol]
    stock_close = data.groupby([data.index.date])[symbol].agg('last')

    #creating dataframe
    frame = {'Close': stock_close}
    df = pd.DataFrame(frame)

    #grouping by months and creating a month and a year column
    stock_close.index = pd.to_datetime(stock_close.index)
    stock_close_monthly = stock_close.groupby([stock_close.index.month])
    
    df['Month'] = stock_close.index.month
    df['Year'] = stock_close.index.year
    df['Symbol'] = [symbol for i in range(len(df))]
    
    df['Change'] = (df['Close']-df['Close'].shift(1))/df['Close'].shift(1)*100
    return df
In [5]:
#outliers detection with IQR and plot each monthly data

def outliers(outliers_df, data, i, j, symbol_name):
    if j == 2017:
        return outliers_df

    monthly_df = data[(data['Month']==i) & (data['Year']==j)]

    #interquartilerange
    Q1 = monthly_df['Change'].quantile(0.25)
    Q3 = monthly_df['Change'].quantile(0.75)
    IQR = Q3 - Q1

    up = Q3 + 1.5 * IQR
    low = Q1 - 1.5 * IQR

    outli = monthly_df[(monthly_df['Change']>up) | (monthly_df['Change']<low)]
    outliers_df = outliers_df.append(outli)
    
    #because data split for months in that function we have to call plot function here
    plot(monthly_df, symbol_name)

    if i == 12:
        j = j+1
        i = 0

    return outliers(outliers_df, data, i+1, j, symbol_name)
In [6]:
#plot function

def plot(data, symbol_name):
    fig = px.line(x = data.index, y = data['Change'], title=symbol_name, markers= True)
    fig.update_layout(
        xaxis_title="Time Horizon" + ' (' + str(data.index[0]) + '/' + str(data.index[-1]) + ')',
        yaxis_title="Daily Percent Change of Closes"
        )
    fig.show()
In [7]:
six_stocks_data = data_preparation(data, stocks)

#each outlier is stored in that dataframe
outliers_df = pd.DataFrame()

for k in stocks:
    last_data = stocks_data(k, six_stocks_data)
    outliers_df = outliers(outliers_df, last_data, 1, 2015, k)
In [8]:
outliers_df
Out[8]:
Close Month Year Symbol Change
2015-03-16 5.9430 3 2015 ASELS 3.405077
2015-06-08 6.2186 6 2015 ASELS -8.361332
2015-06-24 7.1810 6 2015 ASELS 5.054495
2015-09-15 6.8355 9 2015 ASELS 4.922638
2015-10-16 7.0576 10 2015 ASELS -4.026544
... ... ... ... ... ...
2016-09-16 25.3739 9 2016 FROTO -4.224135
2016-09-22 26.8713 9 2016 FROTO 3.353936
2016-09-26 25.9334 9 2016 FROTO -3.312592
2016-12-01 23.5737 12 2016 FROTO -3.742706
2016-12-06 25.3389 12 2016 FROTO 5.438166

173 rows × 5 columns